To perform the tasks, it is necessary to import the libraries used in the script and download the data on which we will be working.
In this script we will be using:
# importing of libraires that will be use in the script
import cv2
import matplotlib.pyplot as plt
import numpy as np
import PIL
%matplotlib inline
from pandas import DataFrame
import pandas as pd
from IPython.display import display, HTML
from skimage.exposure import rescale_intensity
import plotly.graph_objects as go
import pandas as pd
pd.options.display.html.border = 0
pd.options.display.float_format = '{:,.2f}'.format
# download and unpack images
!wget -O lena_std.tif http://www.lenna.org/lena_std.tif
!wget -O bug.zip http://grail.cs.washington.edu/projects/photomontage/data/bug.zip && unzip -o bug.zip
The colab platform requires a special way to display images with opencv. If the notebook is run in collab, execute the following code:
if 'google.colab' in str(get_ipython()):
from google.colab.patches import cv2_imshow
imshow = cv2_imshow
else:
imshow = cv2.imshow
def h_color(a, interpolation=None):
s = [a.shape[0] * 2, a.shape[1] * 2]
plt.figure(figsize=s)
plt.tick_params(
axis='both', which='both',
bottom=False, top=False,
labelbottom=False, labelleft=False, left=False, right=False
)
plt.imshow(a, cmap="gray", interpolation=interpolation)
css = """
<style type="text/css">
table, td, table.dataframe, table.dataframe td {
border: 1px solid black; //border: double;
border-collapse: collapse;
border-style: solid;
border-spacing: 0px;
background-color: rgb(250,250,250);
width: 24px;
height: 24px;
text-align: center;
transform: scale(1.0);
margin: 5px;
}
</style>
"""
def h(s):
return display(HTML(css + DataFrame(s).to_html(header=False, index=False)))
def h_color_3d(z):
fig = go.Figure(data=[go.Surface(z=z)])
fig.update_layout(autosize=False, width=500, height=500)
fig.show()
A convolution is an operation on two functions $f$ and $g$, the result of which is a third function $(f \ast g)$. The convolution can be described by the following formula:
$$(f \ast g)(t) = \int_{-\infty}^{\infty} f(\tau)g(t - \tau)d\tau$$As above:
In image processing we will consider a discrete convolution operation where one function is an image and the other function is a mask or a kernel.
This entails some issues:
the result of a single convolution operation is a scalar, so convolution can be used for every 2nd, 3rd, etc. $t$ (e.g. $t \in [0, 2, 4, 6, ...]$). This parameter is called a step (stride). The result of the convolution with step 2 will be a new function with 2 times smaller domain,
we can use a similar operation while matching the function $g$ to the function $f$: $$(f \ast g)(t) = \int_{-\infty}^{\infty} f(\alpha\tau)g(t - \tau)d\tau$$ where $$\alpha \in [1, 2,...]$$ This operation is called an dilation.
Autor: Vincent Dumoulin, Francesco Visin
Article: A guide to convolution arithmetic for deep learning
URL: www.github.com
Blue image - input image\ green image - output image
The gray area moving across the input image is function $g$. The $f$ function is a blue image, and the result is a green area (which is $f \ast g$).
For the given tables $f$ and $g$ perform the convolution operation. Apply zero padding, unit stripe, and an dilation of 1.
$f = [1, 2, 3]$
$g = [1, 0, 1]$
1. After padding:
$f = [0, 1, 2, 3, 0]$
2. Convolution:
Only for $t \in [0, 1, 2]$
$(f \ast g)(0) = 0 * 1 + 1 * 0 + 2 * 1 = 2$
$(f \ast g)(1) = 1 * 1 + 2 * 0 + 3 * 1 = 4$
$(f \ast g)(2) = 2 * 1 + 3 * 0 + 0 * 1 = 2$
$f \ast g = [2, 4, 2]$
NOTE: the filter2D function was used, which performs the 2D convolution.
f = np.array([[1, 2, 3]], np.uint8)
g = np.array([[1, 0, 1]], np.uint8)
def cvconv(f, g):
# padding
pad_v = (g.shape[0] - 1) // 2
pad_h = (g.shape[1] - 1) // 2
fb = cv2.copyMakeBorder(f, pad_v, pad_v, pad_h, pad_h, cv2.BORDER_CONSTANT, 0)
# convolution
fg_cv = cv2.filter2D(fb.astype(g.dtype), -1, g)
# remove padding from result (opencv does not do this automatically)
return fg_cv[pad_v:fb.shape[0] - pad_v, pad_h:fb.shape[1] - pad_h]
# display
h(f)
h(g)
h(cvconv(f, g))
Convolution also occurs in other libraries:
Implement a function that for arguments f and g will perform a convolution operation. The function should handle padding, stride = 1, without dilation.
Note 1: Be sure not to modify f on the fly. This leads to wrong end results.
Note 2: The implementation operation should be adapted to the 2D data.
Note 3: Try to use the matrix operations presented in lab 1.
import copy as cp
def conv(f, g):
pad_v = (g.shape[0] - 1) // 2
pad_h = (g.shape[1] - 1) // 2
padded_arr = np.zeros(shape=((f.shape[0]+pad_v*2), (f.shape[1]+pad_h*2)))
for i in range(pad_v, f.shape[0]+pad_v*2):
for j in range(pad_h, f.shape[1]+pad_h*2-1):
padded_arr[i, j] = f[i-pad_v, j-pad_h]
conv_arr = cp.deepcopy(f)
for i in range(f.shape[0]):
if i > f.shape[0] - g.shape[0] + 1 + pad_v:
break
for j in range(f.shape[1]):
if j > f.shape[1] - g.shape[1] + 1 + pad_h:
break
tmp = padded_arr[i: i + g.shape[0], j: j + g.shape[1]]
conv_arr[i, j] = np.sum(np.multiply(tmp, g))
return conv_arr
f = np.array([[1, 2, 3]], np.uint8)
g = np.array([[1, 0, 1]], np.uint8)
fg = conv(f, g)
h(f)
h(g)
h(fg)
For some function $ f (x) $ and the point $ x_0 $ for which environment $ f $ is defined. The derivative of the $ f $ function can be defined as:
$$f'(x_0) = \lim_{x \rightarrow x_0} \frac{f(x) - f(x_0)}{x - x_0}$$In other words, the derivative of the function determines how much the value of the $ f $ function changes around a certain point $ x_0 $.
In practice, having a certain sample of data coming from a certain function, we only see a digitized form of this function. Then, the expression $ \lim_{x \rightarrow x_0} $ returns to the nearest value to $ x_0 $ that we have.
Example:
For $x = \{0, 1, 2, 3\}$
and $f(x) = \{3, 2, 1, 0\}$:
$f'(x_0) = \frac{2 - 3}{1 - 0} = -1$
$f'(x_1) = \frac{1 - 2}{2 - 1} = -1$
$f'(x_2) = \frac{0 - 1}{3 - 2} = -1$
The above data comes from the function $ f (x) = -x + 3$, for which we can analytically compute the derivative $ f '(x) = -1 $, which agrees with the empirically calculated derivatives in points.
A simple calculation of gradients in practice comes down to the convolution operation between a certain function $ f $ and the function $ g = [-1, 1] $.
For the sine function, let's take samples with a step 0.01 of function values from the range $ [0, 4 * \pi] $.
t = np.arange(0.0, 4.0 * np.pi, 0.01)
s = np.sin(t)
plt.plot(t, s)
plt.show()
Let's calculate the derivative ($ \frac {f (x) - f (x_0)} {x - x_0} $) in two steps:
der = np.array([[-1, 1]], np.float32)
s_hat = cvconv(s[np.newaxis], der) / (t[1] - t[0])
s_hat = s_hat[0]
plt.plot(t, s)
plt.plot(t, s_hat)
plt.show()
The above operation resulted in a graph of the $ y = \cos (x) $ function, which is true (analytically, the derivative of $ \sin $ is $ \cos $).
Using the above implementation of the derivative, find the extremes of the following function:
$$y = (x-3)x(x-1)(x-4)(x-7)$$Note: due to numerical errors, consider values close to 0.0 as zero. The correct result will be to print a list of arguments for which the derivative is close to zero.
der = np.array([[-1, 1]], np.float32)
x = np.arange(-3.5, 7.5, 0.001)
y = 0.001 * (x+3)*x*(x-1)*(x-4)*(x-7)
y_hat = cvconv(y[np.newaxis], der) / (x[1] - x[0])
y_hat = y_hat[0]
extremes = np.isclose(y_hat, 0, atol=0.0003)
print(x[extremes])
print(y[extremes])
plt.plot(x, y, label='f(x)')
plt.plot(x[2:], y_hat[2:], label="f'(x)")
plt.grid()
plt.show()
We identified images as 2-dimensional functions. This means that we can also count derivatives for them.
The derivative for 2D images has an important property: for changes in the function (image) value, i.e. for places where the pixel intensity is different, the derivative will return a value other than zero.
Let's define a simple binary image, shown below.
s = np.array([
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]
], np.float32)
h_color_3d(s)
Then, we can define two functions that calculate the derivative for horizontal and vertical data, respectively:
der_h = np.array([[-1, 1]], np.float32)
der_v = np.array([[-1], [1]], np.float32)
h_color_3d(cvconv(s, der_h) / (t[1] - t[0]))
h_color_3d(cvconv(s, der_v) / (t[1] - t[0]))
To better detect areas where the color intensity differences are greater, you can find areas where the 1st order derivative quickly increases / decreases. This is where the edges of the objects in the image lie.
To calculate such places, we can calculate the derivative of the calculated derivative, i.e. the derivative of the 2nd order.
The formula for the derivative:
$$f'(x_0) = \lim_{x \rightarrow x_0} \frac{f(x) - f(x_0)}{x - x_0}$$Images are functions with unit-varying arguments (e.g. pixel with position (0,0), (0,1), (0, ...). This means that: $$ \lim_{x \rightarrow x_0} x - x_0 = 1$$ So we can write the derivative for the image as:
$$f'(x_0) = \frac{f(x_1) - f(x_0)}{x_1 - x_0} = f(x_1) - f(x_0)$$Then the 2nd order derivative takes the form:
$$f''(x_0) = (f(x_1) - f(x_0))'$$$$f''(x_0) = f'(x_1) - f'(x_0)$$$$f''(x_0) = f(x_2) - f(x_1) - f(x_1) + f(x_0)$$$$f''(x_0) = f(x_2) -2 f(x_1) + f(x_0)$$The above formula can be realized using the convolution with the function $ g = [1, -2, 1] $.
Let's define an image whose edges are not so visible.
s = np.array([
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, .8, .5, .1, 0, 0],
[0, 1, 1, .8, .6, .2, 0, 0],
[0, 1, 1, 1, .7, .3, .05, 0],
[0, 1, 1, 1, .7, .3, .05, 0],
[0, 1, 1, 1, .5, .3, 0, 0],
[0, 1, 1, 1, .3, .1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]
], np.float32)
der_h = np.array([
[1, -2, 1]
], np.float32)
der_v = np.array([
[1],
[-2],
[1]
], np.float32)
h_color_3d(s)
h_color_3d(cvconv(s, der_h))
h_color_3d(cvconv(s, der_v))
Performing the derivative of the 2nd order horizontally we use the function $ g = [[-1, 2, -1]] $, which has the size (1, 3). Its operation is equal to convolution with the function:
$$g_h = \begin{bmatrix} 0 & 0 & 0\\ -1 & 2 & -1\\ 0 & 0 & 0\\ \end{bmatrix}$$Similarly for the 2nd order derivative vertically:
$$g_v = \begin{bmatrix} 0 & -1 & 0\\ 0 & 2 & 0\\ 0 & -1 & 0\\ \end{bmatrix}$$Both functions can be combined with the addition operation, obtaining the function:
$$g = \begin{bmatrix} 0 & -1 & 0\\ -1 & 4 & -1\\ 0 & -1 & 0\\ \end{bmatrix}$$This is called the Laplasjan operator, which can be used to detect edges vertically and horizontally simultaneously.
der = np.array([
[0, -1, 0],
[-1, 4, -1],
[0, -1, 0]
], np.float32)
h_color_3d(cvconv(s, der))
h_color_3d(np.abs(cvconv(s, der)))
The OpenCV library contains ready-made implementations of many convolutional operations. The most popular are:
The results of their implementation in OpenCV are presented below.
img = cv2.imread('./lena_std.tif', 1)
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imshow(img_grayscale)
img_blur = cv2.blur(img_grayscale, (3, 3))
img_gaussian_blur = cv2.GaussianBlur(img_grayscale, (5, 5), 0)
img_laplacian = cv2.Laplacian(img_grayscale, cv2.CV_32F)
img_sobel = cv2.Sobel(img_grayscale, cv2.CV_64F, 1, 0, ksize=3)
img_median = cv2.medianBlur(img_grayscale, 5)
img_bilateral = cv2.bilateralFilter(img_grayscale, 9, 75, 75)
imshow(np.concatenate([img_blur, img_gaussian_blur, img_laplacian], 1))
imshow(np.concatenate([img_sobel, img_median, img_bilateral], 1))
Prepare the $g_1 $ and $ g_2 $ functions for the blur and gaussian blur operations respectively.
Perform a convolution operation with these functions on the image `` Lenna '' in RGB format.
Note: Use only numpy and python.
def blur_kernel(size):
return (1/size**2) * np.ones((size,size))
def dnorm(x, mu, sd):
return 1 / (np.sqrt(2 * np.pi) * sd) * np.e ** (-np.power((x - mu) / sd, 2) / 2)
def gaussian_kernel(size, sig=3.0):
vec = np.linspace(-(size // 2), size // 2, size)
for i in range(size):
vec[i] = dnorm(vec[i], 0, sig)
kernel = np.outer(vec.T, vec.T)
return kernel * 1.0 / kernel.max()
g1 = blur_kernel(11)
g2 = gaussian_kernel(21)
img_g1 = cvconv(img, g1)
img_g1 = img_g1.astype(np.uint8)
img_g2 = cvconv(img, g2)
img_g2 = rescale_intensity(img_g2) * 255.0
img_g2 = img_g2.astype(np.uint8)
imshow(np.concatenate([img, img_g1, img_g2], 1))
A practical use of Laplasjan is edge detection. By defining the $ g $ function as:
$$g = \begin{bmatrix} 0 & -1 & 0\\ -1 & 4 & -1\\ 0 & -1 & 0\\ \end{bmatrix}$$and by applying convolution on the image in the grayscale, we can perform simple edge detection.
g = np.array([
[0, -1, 0],
[-1, 4, -1],
[0, -1, 0]
], np.float32)
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 255.0
img_edges = cv2.filter2D(img_grayscale, -1, g)
imshow(np.concatenate([img_grayscale, img_edges], 1) * 255.0)
Knowing that Laplasjan returns values close to zero for pixels around which there is little change in intensity and extreme values for areas with high change in pixel intensity (both negative and positive values) we can take the absolute values of the result, thus finding the edges.
img_edges = np.abs(img_edges)
img_edges = rescale_intensity(img_edges)
imshow(np.concatenate([img_grayscale, img_edges], 1) * 255.0)
Note:
The 2nd order derivative by definition returns the extreme values for areas of high variability in intensity. This feature characterizes not only the edges but also the areas of high sharpness.
Ant challenge is an image reconstruction problem from multiple intermediate images that contain details relevant to the result. The data includes pictures of the ant (in relatively the same pose) with focus at different distances from the lens.
The task is to propose an image reconstruction algorithm that will contain the sharpest areas from individual frames.
Tips:
import copy
files = [
'./bug/b_bigbug0000_croppped.png',
'./bug/b_bigbug0001_croppped.png',
'./bug/b_bigbug0002_croppped.png',
'./bug/b_bigbug0003_croppped.png',
'./bug/b_bigbug0004_croppped.png',
'./bug/b_bigbug0005_croppped.png',
'./bug/b_bigbug0006_croppped.png',
'./bug/b_bigbug0007_croppped.png',
'./bug/b_bigbug0008_croppped.png',
'./bug/b_bigbug0009_croppped.png',
'./bug/b_bigbug0010_croppped.png',
'./bug/b_bigbug0011_croppped.png',
'./bug/b_bigbug0012_croppped.png',
]
def detect_prec(img):
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 255.0
img_edges = cv2.filter2D(img_grayscale, -1, g)
return rescale_intensity(np.abs(img_edges))
def merge(bugs, bugs_prec):
merged_bugs = copy.deepcopy(bugs[0])
for x in range(merged_bugs.shape[0]):
for y in range(merged_bugs.shape[1]):
pixel = np.array([0.0, 0.0, 0.0])
avg = 0.0
for i in range(len(bugs)):
pixel += bugs[i][x, y] * bugs_prec[i][x, y]
avg += bugs_prec[i][x, y]
merged_bugs[x, y] = pixel / avg
#Gamma correction
hsv = cv2.cvtColor(merged_bugs, cv2.COLOR_BGR2HSV)
hue, sat, val = cv2.split(hsv)
mid = 0.5
mean = np.mean(val)
gamma = np.log(mid*255)/np.log(mean)
print('\n===')
print('The best gamma is ', gamma, sep = "")
invGamma = 1.0 / gamma
for x in range(merged_bugs.shape[0]):
for y in range(merged_bugs.shape[1]):
merged_bugs[x, y] = ((merged_bugs[x, y] / 255) ** invGamma) * 255
return merged_bugs
# data loading
bugs = [cv2.imread(f, 1) for f in files]
bugs = list(map(lambda i: cv2.resize(i, None, fx=0.3, fy=0.3), bugs))
bugs_prec = list(map(detect_prec, bugs))
# load the ground truth image
result = cv2.imread('./bug/result.png', 1)
result = cv2.resize(result, None, fx=0.3, fy=0.3)
print('\n===')
print('Pictures of ants with different sharpness at different distances\n')
imshow(np.concatenate(bugs[0:4], 1))
imshow(np.concatenate(bugs[4:8], 1))
imshow(np.concatenate(bugs[8:12], 1))
imshow((np.concatenate(bugs_prec[0:4], 1) * 255).astype(np.uint8))
imshow((np.concatenate(bugs_prec[4:8], 1) * 255).astype(np.uint8))
imshow((np.concatenate(bugs_prec[8:12], 1) * 255).astype(np.uint8))
bug_combined = merge(bugs, bugs_prec)
bug = np.stack(bugs, 0).mean(0)
print('\n===')
print('Normal averaging of the component images and the target image\n')
imshow(np.concatenate([result, bug], 1))
print('\n===')
print('The result of image reconstruction based on the detection of high-sharpness areas\n')
imshow(np.concatenate([result, bug_combined], 1))